{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "import urllib2\n",
    "import csv\n",
    "import os\n",
    "import time\n",
    "from time import sleep"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_page(url):\n",
    "    hdr = {'User-Agent': ' Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20100101 Firefox/24.0'}\n",
    "    req = urllib2.Request(url, headers=hdr)\n",
    "    try:\n",
    "        page = urllib2.urlopen(req)\n",
    "        content = page.read()\n",
    "        return content\n",
    "    except urllib2.HTTPError, e:\n",
    "        print e.fp.read()\n",
    "        return \"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_next_station(page):     \n",
    "    start_number = page.find('location-map-info-window-title\" href=\"')    \n",
    "    if start_number == -1:         \n",
    "        return None, 0     \n",
    "\n",
    "    start_link = page.find('href=\"', start_number)\n",
    "    start_link = start_link+6\n",
    "    end_link = page.find('\">', start_link)\n",
    "    link = page[start_link:end_link]\n",
    "    link = link.lstrip()\n",
    "    link = link.rstrip()\n",
    "    \n",
    "    # we'll also get a station's brand here\n",
    "    start_station_brand = page.find('<div class=\"location-map-info-window-partners\"> <div class=\"partner\" data-property=\"',end_link)\n",
    "    start_station_brand = start_station_brand + 84\n",
    "    end_station_brand = page.find('\">',start_station_brand)\n",
    "    station_brand = page[start_station_brand:end_station_brand]\n",
    "    station_brand = station_brand.lstrip()\n",
    "    station_brand = station_brand.rstrip()\n",
    "    \n",
    "    return link, station_brand, end_station_brand"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_all_stations(page):    \n",
    "    links = []  \n",
    "    brands =[]\n",
    "    n_stations = 0 \n",
    "    while True:        \n",
    "        results = get_next_station(page)         \n",
    "        if results[0]:            \n",
    "            links.append(results[0])      \n",
    "            brands.append(results[1])\n",
    "            page = page[results[2]:] \n",
    "            n_stations = n_stations + 1\n",
    "            print n_stations\n",
    "        else:\n",
    "            break     \n",
    "    return links, brands, n_stations "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n",
      "2\n",
      "3\n",
      "4\n",
      "5\n",
      "6\n",
      "7\n",
      "8\n",
      "9\n",
      "10\n",
      "11\n",
      "12\n",
      "13\n",
      "14\n",
      "15\n",
      "16\n",
      "17\n",
      "18\n",
      "19\n",
      "20\n",
      "21\n",
      "22\n",
      "23\n",
      "24\n",
      "25\n",
      "26\n",
      "27\n",
      "28\n",
      "29\n",
      "30\n",
      "31\n",
      "32\n",
      "33\n",
      "34\n",
      "35\n",
      "36\n",
      "37\n",
      "38\n",
      "39\n",
      "40\n",
      "41\n",
      "42\n",
      "43\n",
      "44\n",
      "45\n",
      "46\n",
      "47\n",
      "48\n",
      "49\n",
      "50\n",
      "51\n",
      "52\n",
      "53\n",
      "54\n",
      "55\n",
      "56\n",
      "57\n",
      "58\n",
      "59\n",
      "60\n",
      "61\n",
      "62\n",
      "63\n",
      "64\n",
      "65\n",
      "66\n",
      "67\n",
      "68\n",
      "69\n",
      "70\n",
      "71\n",
      "72\n",
      "73\n",
      "74\n",
      "75\n",
      "76\n",
      "77\n",
      "78\n",
      "79\n",
      "80\n",
      "81\n",
      "82\n",
      "83\n",
      "84\n",
      "85\n",
      "86\n",
      "87\n",
      "88\n",
      "89\n",
      "90\n",
      "91\n",
      "92\n",
      "93\n",
      "94\n",
      "95\n",
      "96\n",
      "97\n",
      "98\n",
      "99\n",
      "100\n",
      "101\n",
      "102\n",
      "103\n",
      "104\n",
      "105\n",
      "106\n",
      "107\n",
      "108\n",
      "109\n",
      "110\n",
      "111\n",
      "112\n",
      "113\n",
      "114\n",
      "115\n",
      "116\n",
      "117\n",
      "118\n",
      "119\n",
      "120\n",
      "121\n",
      "122\n",
      "123\n",
      "124\n",
      "125\n",
      "126\n",
      "127\n",
      "128\n",
      "129\n",
      "130\n",
      "131\n",
      "132\n",
      "133\n",
      "134\n",
      "135\n",
      "136\n",
      "137\n",
      "138\n",
      "139\n",
      "140\n",
      "141\n",
      "142\n",
      "143\n",
      "144\n",
      "145\n",
      "146\n",
      "147\n",
      "148\n",
      "149\n",
      "150\n",
      "151\n",
      "152\n",
      "153\n",
      "154\n",
      "155\n",
      "156\n",
      "157\n",
      "158\n",
      "159\n",
      "160\n",
      "161\n",
      "162\n",
      "163\n",
      "164\n",
      "165\n",
      "166\n",
      "167\n",
      "168\n",
      "169\n"
     ]
    }
   ],
   "source": [
    "# getting all of the links\n",
    "all_stations_links_page = \"https://www.raststaetten.de/alle-standorte/\"\n",
    "AB_gas_station_links, AB_gas_station_brands, n_AB_stations = get_all_stations(get_page(all_stations_links_page))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/standorte/aachener-land-nord/\n",
      "0\n"
     ]
    }
   ],
   "source": [
    "print AB_gas_station_links[0]\n",
    "print AB_gas_station_links.index('/standorte/aachener-land-nord/')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "# getting lat + long from a link\n",
    "def get_station_info(link):\n",
    "    page = get_page(link)\n",
    "    \n",
    "    lat_start = page.find('latitude=\"')\n",
    "    lat_start = lat_start +10\n",
    "    lat_end = page.find('\" d',lat_start)\n",
    "    lat = page[lat_start:lat_end]\n",
    "    \n",
    "    long_start = page.find('long',lat_end)\n",
    "    long_start = long_start +11\n",
    "    long_end = page.find('\">',long_start)\n",
    "    long = page[long_start:long_end]    \n",
    "    \n",
    "    return lat, long\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['shell', '50.818', '6.215']\n",
      "['shell', '50.816865', '6.215741']\n",
      "['esso', '47.84951', '10.280821']\n",
      "['aral', '52.317754', '13.496276']\n",
      "['aral', '52.316286', '13.494771']\n",
      "['shell', '49.320493', '8.568246']\n",
      "['shell', '50.892978', '12.938842']\n",
      "['aral', '50.894139', '12.946361']\n",
      "['aral', '52.501357', '13.27712']\n",
      "['aral', '50.300769', '8.236677']\n",
      "['aral', '47.766787', '12.902886']\n",
      "['aral', '49.69', '8.603']\n",
      "['shell', '52.321214', '14.389105']\n",
      "['shell', '52.320393', '14.383616']\n",
      "['shell', '47.970762', '7.721949']\n",
      "['shell', '49.162062', '8.567777']\n",
      "['shell', '52.252078', '12.307421']\n",
      "['shell', '52.250214', '12.306883']\n",
      "['total', '52.883369', '13.704344']\n",
      "['total', '52.883086', '13.702049']\n",
      "['total', '52.186', '11.451']\n",
      "['total', '52.1840432', '11.4507067']\n",
      "['aral', '51.38168', '9.23633']\n",
      "['eEnbw', '47.834109', '12.4027']\n",
      "['shell', '51.237138', '6.513671']\n",
      "['agip', '51.237221', '6.510674']\n",
      "['esso', '50.688908', '8.295883']\n",
      "['aral', '48.588', '13.366']\n",
      "['oMV', '48.59', '13.366']\n",
      "['aral', '51.06322', '13.56221']\n",
      "['aral', '51.06118', '13.56956']\n",
      "['aral', '48.418723', '10.811543']\n",
      "['eEnbw', '51.264922', '7.243754']\n",
      "['total', '50.94981', '11.196016']\n",
      "['total', '50.948458', '11.196053']\n",
      "['total', '50.069527', '6.879181']\n",
      "['shell', '50.069143', '6.881767']\n",
      "['eEon', '54.804014', '9.328616']\n",
      "['esso', '48.983663', '10.195741']\n",
      "['eInnogy', '50.581522', '7.479243']\n",
      "['shell', '52.132662', '12.753397']\n",
      "['shell', '52.132549', '12.751164']\n",
      "['total', '49.241388', '10.355098']\n",
      "['shell', '50.929094', '6.775288']\n",
      "['total', '51.559183', '13.921463']\n",
      "['shell', '51.568589', '13.928012']\n",
      "['shell', '52.422227', '9.557687']\n",
      "['aral', '51.310425', '6.63478']\n",
      "['agip', '49.209265', '6.966944']\n",
      "['esso', '49.206778', '6.965472']\n",
      "['eEnbw', '50.438671', '7.646913']\n",
      "['oMV', '48.605956', '9.632872']\n",
      "['aral', '53.090308', '9.178047']\n",
      "['aral', '52.434341', '13.191749']\n",
      "['eEon', '53.520471', '10.804038']\n",
      "['aral', '51.865909', '8.37158']\n",
      "['esso', '53.484973', '10.025441']\n",
      "['shell', '52.337', '9.865']\n",
      "['eEon', '51.09103', '13.23736']\n",
      "['aral', '53.423729', '9.966288']\n",
      "['total', '53.424391', '9.96239']\n",
      "['shell', '53.055777', '8.501044']\n",
      "['score', '53.054691', '8.501514']\n",
      "['shell', '51.035424', '9.48887']\n",
      "['aral', '51.035977', '9.486519']\n",
      "['agip', '52.220564', '11.053758']\n",
      "['shell', '50.870696', '11.850398']\n",
      "['shell', '50.87499', '11.846279']\n",
      "['shell', '52.127533', '10.051503']\n",
      "['agip', '50.427116', '11.794892']\n",
      "['esso', '47.827929', '12.589252']\n",
      "['esso', '49.691252', '6.896044']\n",
      "['aral', '49.68958', '6.90015']\n",
      "['eEnbw', '47.997891', '11.675564']\n",
      "['esso', '53.716893', '9.939533']\n",
      "['total', '53.717401', '9.937914']\n",
      "['shell', '47.891644', '11.731666']\n",
      "['aral', '53.041081', '8.220615']\n",
      "['shell', '53.045422', '8.220819']\n",
      "['shell', '47.926413', '11.403075']\n",
      "['oMV', '47.927672', '11.401813']\n",
      "['shell', '50.912687', '10.587045']\n",
      "['shell', '51.635923', '6.747347']\n",
      "['aral', '51.636367', '6.745025']\n",
      "['shell', '54.364451', '9.67189']\n",
      "['aral', '54.36398', '9.669951']\n",
      "['shell', '49.964683', '7.768']\n",
      "['aral', '48.124164', '10.112394']\n",
      "['tamoil', '47.861155', '8.786002']\n",
      "['agip', '49.323541', '9.414346']\n",
      "['esso', '49.325833', '9.413054']\n",
      "['esso', '49.272', '11.6']\n",
      "['segaFredo', '51.898061', '6.165097']\n",
      "['aral', '51.267201', '9.518551']\n",
      "['aral', '50.831161', '9.576526']\n",
      "['shell', '49.243994', '8.879833']\n",
      "['total', '51.606076', '12.185403']\n",
      "['total', '51.608724', '12.184917']\n",
      "['eInnogy', '50.899619', '7.151136']\n",
      "['aral', '53.109449', '9.98393']\n",
      "['esso', '50.249804', '8.994793']\n",
      "['esso', '50.249565', '8.99344']\n",
      "['esso', '48.437515', '10.213661']\n",
      "['eInnogy', '51.411589', '7.499144']\n",
      "['shell', '50.395213', '8.077491']\n",
      "['eOther', '50.520453', '8.75529']\n",
      "['aral', '52.755338', '12.853089']\n",
      "['aral', '52.752863', '12.855399']\n",
      "['total', '48.308305', '7.791374']\n",
      "['esso', '52.21576', '11.078349']\n",
      "['shell', '50.095807', '8.353607']\n",
      "['agip', '52.302216', '13.020311']\n",
      "['total', '52.301847', '13.017607']\n",
      "['aral', '50.266', '7.513']\n",
      "['esso', '51.944838', '7.548153']\n",
      "['total', '48.205994', '8.625453']\n",
      "['eEnbw', '50.422807', '7.92821']\n",
      "['shell', '54.067585', '10.755556']\n",
      "['avia', '54.07115', '10.75624']\n",
      "['total', '51.122912', '6.79609']\n",
      "['shell', '49.621704', '9.728917']\n",
      "['aral', '51.157481', '6.962202']\n",
      "['shell', '51.105401', '11.957702']\n",
      "['shell', '51.105341', '11.954837']\n",
      "['shell', '53.300451', '9.534169']\n",
      "['avia', '48.960859', '12.093162']\n",
      "['esso', '49.52722', '8.07359']\n",
      "['shell', '49.811801', '8.578584']\n",
      "['aral', '49.813096', '8.577863']\n",
      "['esso', '51.655803', '11.822067']\n",
      "['eEon', '48.17', '11.448']\n",
      "['eTur', '50.821584', '12.765012']\n",
      "['aral', '53.839627', '12.288354']\n",
      "['aral', '53.840372', '12.290762']\n",
      "['avia', '50.267764', '9.882776']\n",
      "['esso', '50.265135', '9.881093']\n",
      "['esso', '49.943996', '10.020716']\n",
      "['eEnbw', '50.930768', '6.339835']\n",
      "['eInnogy', '50.930488', '6.338083']\n",
      "['eEnbw', '51.28828', '7.56871']\n",
      "['aral', '47.80295', '12.177744']\n",
      "['esso', '53.511299', '10.852944']\n",
      "['esso', '48.050002', '7.810907']\n",
      "['star', '52.542319', '13.691644']\n",
      "['shell', '52.543051', '13.693251']\n",
      "['eEnbw', '50.52291', '7.674062']\n",
      "['aral', '50.880545', '7.935169']\n",
      "['esso', '51.29641', '7.21363']\n",
      "['avia', '51.29594', '7.21448']\n",
      "['total', '53.367929', '11.730466']\n",
      "['total', '53.367017', '11.728317']\n",
      "['agip', '52.640096', '13.24307']\n",
      "['eTur', '50.877326', '11.808342']\n",
      "['aral', '50.418593', '9.734466']\n",
      "['eEnbw', '51.790785', '8.072719']\n",
      "['shell', '51.12473', '6.600183']\n",
      "['agip', '52.950989', '12.662142']\n",
      "['agip', '52.950777', '12.659641']\n",
      "['shell', '50.055744', '8.909382']\n",
      "['shell', '50.055212', '8.907371']\n",
      "['aral', '50.35566', '8.69456']\n",
      "['esso', '52.914932', '8.425141']\n",
      "['aral', '52.913898', '8.426727']\n",
      "['esso', '52.684472', '13.011223']\n",
      "['esso', '52.665298', '13.004641']\n",
      "['shell', '49.647', '8.29']\n",
      "['aral', '49.755737', '9.95931']\n",
      "['aral', '49.750515', '9.963269']\n",
      "['shell', '52.339173', '10.359601']\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "from random import randint\n",
    "from time import sleep\n",
    "\n",
    "# for each link, get geographic coordinates and brand of station\n",
    "# export everything as a CSV file\n",
    "export_filename = \"/users/danielershov/dropbox/GasStation/Data/Autobahn Stations/all_AB_stations.csv\"\n",
    "\n",
    " \n",
    "# writing to csv file \n",
    "with open(export_filename, 'w') as csvfile: \n",
    "    # creating a csv writer object \n",
    "    csvwriter = csv.writer(csvfile) \n",
    "    \n",
    "    for link in AB_gas_station_links:\n",
    "        sleep(randint(1,3))\n",
    "        full_link = \"https://www.raststaetten.de/\"+link\n",
    "        lat,long = get_station_info(full_link)\n",
    "        row = [AB_gas_station_brands[AB_gas_station_links.index(link)],lat,long]\n",
    "        csvwriter.writerow(row)\n",
    "        print row\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
